Code
# libraries
library(dplyr)
library(ggplot2)
library(forcats)
# source functions
names_functions = list.files(here::here("functions"))
for (f in names_functions)
source(here::here("functions", f))
rm(f, names_functions)This notebook constructs the priors for the model estimation. See model_description.htmlfor details. Some of the priors are independent of the specific scenario being estimated, e.g. the innovations to the reverse random-walk \boldsymbol{W}; others depend on the scenario and the resulting fundamental forecast, e.g. the prior mean m_{\mu_T} and variance V_{\mu_T} on the (transformed) latent voting intentions on election day.
# libraries
library(dplyr)
library(ggplot2)
library(forcats)
# source functions
names_functions = list.files(here::here("functions"))
for (f in names_functions)
source(here::here("functions", f))
rm(f, names_functions)Initialize empty list to store priors for different scenarios
priors <- list() Load states
states <- load_states()Load number of parties running in each state -> needed to set the prior on \mu_T
n_parties_by_state <- load_n_parties_by_geography("state")Load election results -> needed to estimate \boldsymbol{W}
election_results <- load_election_vote_shares()
n_elections <- length(unique(election_results$year))Calculate the dim names of \mu_T, i.e. the state and party that each entry refers to. These are also the dimnames of \boldsymbol{W} and V_{\mu_T}. These serve as a cross-check that the priors are ordered in the same way as in the data preparation and estimation!
names_mmu_T <- c()
parties <- load_parties()
for (s in seq(1, length(n_parties_by_state))) {
tmp <- paste0(names(n_parties_by_state)[s], "_")
for (p in seq(1, n_parties_by_state[s] - 1)) {
names_mmu_T <- c(names_mmu_T, paste0(tmp, parties[p]))
}
}Load the list of states in which historically one party has been the clear winner
dominated_states <- readRDS(here::here("fundamental_forecast", "dominated_states.rds"))To specify the “prior” on the innovations to the reverse random-walk, it is useful to decompose the covariance as
\boldsymbol{W} = \kappa \times \boldsymbol{\hat{W}}
where \boldsymbol{\hat{W}} is a correlation matrix and \kappa a scale factor. Intuitively, \boldsymbol{\hat{W}} accounts for the comovement of the changes in latent voting intentions across parties and states while \kappa governs how large changes in voting intentions are!
To estimate \boldsymbol{\hat{W}} I consider the correlation of historical election outcomes across parties and states. These correlations are far from a perfect measure of what I am after because there is no variation in vote shares/intentions within an election campaign. But using the historic results has the advantage of being straight-forward to calculate!
An additional source of information could be the correlation across parties and states of polls from previous elections but it’s less obvious to me how these would need to be handled. Aggregated over time in some way? Does it make sense to compare a poll in Amperville on May 5th in 1984 with one in Circuiton on June 7th in 2023? What does that say about the evolution of the underlying voting intentions from day to day? Also, polls don’t just track the voting intentions but also noise like house effects. Although these might be constant from day to day so actual movement in polls would be down to changes in voting intentions. On the whole, I am not sure how useful these data are or at least how I could extract useful information from them for my goal.
Lastly, demographic information like ethnicity could be useful in specifying the correlation of changes in voting intentions between states. But similarities or differences in ethnicities across states don’t necessarily reveal anything about correlations between parties.
When calculating the correlation of latent vote share across states and parties, I use all the available historical election results since
[…] although parties’ vote shares in each province oscillate from year to year—possibly for quantifiable reasons like the strength of the economy, and possibly for unquantifiable ones like the comparative appeal of their candidates or of their proposed policies—the baseline popularity of each party in each province is constant all the way from 1984 to 2024, as are the characteristics of each pollster. There is no equivalent in Dataland of, say, West Virginia in the United States shifting over time from a reliably Democratic-voting state to a reliably Republican-voting one, because there are no long-run time trends. As a result, when predicting elections in 2024, you should treat historical results and polls from 1984 as being just as informative as those from 2023 are. (background material on Dataland, my emphasis)
Plot historical election results
election_results %>%
tidyr::pivot_longer(
cols = c("cc", "dgm", "pdal", "ssp"),
names_to = "party",
values_to = "vote_share"
) %>%
filter(vote_share != 0) %>%
ggplot(aes(
x = year,
y = vote_share,
color = party
)
) +
geom_point() +
facet_wrap(~state) +
ggsci::scale_color_jco()# calculate correlation acroos elections
mat_res <- calc_cor_election_results(election_results)
corr_mat <- cor(mat_res)Given the large amount of parameters I estimate and the relatively short sample, I also consider a James-Stein-type shrinkage estimator of the correlation matrix. The degree of shrinkage is estimated from the data (see the documentation of cor.shrink). Shrinking the correlation coefficients may improve the performance of the model by dampening the estimation uncertainty.
corr_mat_shrink <- corpcor::cor.shrink(mat_res)Estimating optimal shrinkage intensity lambda (correlation matrix): 0.2019
# manually convert to matrix class first
class(corr_mat_shrink) <- "matrix"Use shrunk correlation matrix!
W_hat <- corr_mat_shrinkkappa <- 0.01W <- kappa * W_hat
priors[["A"]][["W"]] <-
priors[["B"]][["W"]] <-
priors[["C"]][["W"]] <-
priors[["D"]][["W"]] <-
priors[["E"]][["W"]] <- Wsig_ddelta <- 0.01 # specify in terms of standard deviation for Stan!Store in list
priors[["A"]][["sig_ddelta"]] <-
priors[["B"]][["sig_ddelta"]] <-
priors[["C"]][["sig_ddelta"]] <-
priors[["D"]][["sig_ddelta"]] <-
priors[["E"]][["sig_ddelta"]] <- sig_ddeltascenarios <- load_scenarios()Load fundamental forecast
df_fcast <- readRDS(here::here("fundamental_forecast", "fundamental_forecast.rds"))Loop over scenarios, convert to log ratio scale and store in list
for (scen in scenarios) {
df_fcast %>%
inner_join(
load_dataland_states_regions(),
by = join_by(province == state)) %>%
filter(scenario == scen) %>%
select(-scenario) %>%
mutate(party = toupper(party)) %>%
group_by(province) %>%
mutate(vote_share_lr = additive_log_ratio(vote_share)) %>%
filter(vote_share_lr != 0) %>%
arrange(region, province) %>%
tidyr::unite(
name,
province,
party,
sep = "_") -> df_m_mmu_T
# check calc
stopifnot(all(df_m_mmu_T$name == names_mmu_T))
# store in list
priors[[scen]][["m_mmu_T"]] <- df_m_mmu_T$vote_share_lr
names(priors[[scen]][["m_mmu_T"]]) <- names_mmu_T
rm(df_m_mmu_T)
}The prior variance can vary across scenarios to reflect how far advanced the campaign is.
In addition, it can also be set to a smaller value - placing greater weight on the fundamental forecast - in those states where within a given scenario fewer or no polls are available
Or it could also be made much tighter to observe the fact that in previous elections (not used in the estimation!) there were certain parties clearly dominating in some states and that in the absence of further evidence this dominance seems like to hold.
Scale elements of V_{\mu_T} down if party is “dominating”
var_V_mmu_T <- 1
scale_dominated_states <- 0.1
diag_V_mmu_T <- c()
for (s in states) {
if (s %in% dominated_states) {
diag_V_mmu_T <- c(
diag_V_mmu_T,
rep(
scale_dominated_states * var_V_mmu_T,
n_parties_by_state[s] - 1)
)
} else {
diag_V_mmu_T <- c(
diag_V_mmu_T,
rep(var_V_mmu_T, n_parties_by_state[s] - 1)
)
}
}
V_mmu_T <- diag(diag_V_mmu_T)
dimnames(V_mmu_T) <- list(names_mmu_T, names_mmu_T)Scale variance to account for differences in data availability in the scenarios
scale_scenarios <- c(
"A" = 1, # complete data up until election day
"B" = 0.2, # very few polls, only in April, many regions and states without any
"C" = 0.5, # polls up until May, almost all regions and states covered but mostly national
"D" = 1, # similar to A
"E" = 0.5 # similar to C
)Store in list
for (scen in scenarios) {
priors[[scen]][["V_mmu_T"]] <- V_mmu_T * scale_scenarios[scen]
}saveRDS(priors, file = here::here("priors", "priors.Rds"))
rm(priors)To evaluate if the joint prior distribution specified above translates into reasonable prior beliefs for the expected voting intentions or observed polls, I construct samples from the prior predictive distribution in the different scenarios.
This is similar in spirit to the prior predictive distribution which samples the data conditional on the prior. For definition and background see one’s favorite textbook on Bayesian statistics, Wiki or Stan docs.
Two questions are of interest:
does the joint prior imply reasonable ranges for the expected vote share both across parties and across time?
is the size of the house effects plausible?
priors <- readRDS(file = here::here("priors", "priors.Rds"))n_draws <- 500
n_periods <- length(
load_dates_election_campaign()
)Function to obtain a draw from the joint prior
simulate_mmu_ppi <- function(
n_periods,
m_mmu_T,
V_mmu_T,
W,
sig_ddelta
) {
# draw mmu_T
mmu_T <- MASS::mvrnorm(
1,
mu = m_mmu_T,
Sigma = V_mmu_T
)
# draw mmu
W <- W
mmu <- matrix(
NA,
nrow = length(mmu_T),
ncol = n_periods
)
mmu[, n_periods] <- mmu_T
for (t in seq(n_periods - 1, 1, by = -1)) {
mmu[, t] = mmu[, t + 1] +
MASS::mvrnorm(1,
mu = rep(0, nrow(W)),
Sigma = W
)
}
# convert mmu to df
dimnames(mmu) <- list(names_mmu_T, seq(1, n_periods))
mmu %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "tmp") %>%
tidyr::pivot_longer(
cols = -tmp,
names_to = "t",
values_to = "mmu"
) %>%
tidyr::separate(tmp, into = c("state", "party"), sep = "_") -> df_tmp
# fill in "missing" parties with a value of 0
df_st_pa <- data.frame()
for (s in states) {
df_st_pa <- rbind(
df_st_pa,
data.frame(
state = s,
party = parties[1:n_parties_by_state[s]]
)
)
}
do.call("rbind",
lapply(
seq(1, n_periods),
function(t) {
df_tmp <- df_st_pa
df_tmp$t = t
df_tmp$ppi = NA
df_tmp
}
)) -> df_draw
df_draw <- merge(
df_draw,
df_tmp,
by = c("state", "party", "t"),
all.x = TRUE
)
# invert log ratio transformation
df_draw %>%
mutate(mmu = ifelse(
is.na(mmu),
0,
mmu
)
) %>%
group_by(t, state) %>%
mutate(
ppi = inv_additive_log_ratio(mmu),
mmu_plus_house = mmu + rnorm(n = n(), sd = sig_ddelta),
ttheta = inv_additive_log_ratio(mmu_plus_house)
) -> df_draw
return(df_draw)
}Functions to plot \pi and \theta
plt_draws_ppi <- function(
df_draws,
plt_title,
plt_caption
) {
df_draws %>%
mutate(
ppi = ifelse(
ppi == 0.0,
NA,
ppi
)
) %>%
group_by(
t,
state,
party
) %>%
summarise(
mn = mean(ppi, na.rm = T),
q_95_upp = quantile(
ppi,
prob = c(0.025),
na.rm = T
),
q_95_low = quantile(
ppi,
prob = c(1-0.025),
na.rm = T
),
q_83_upp = quantile(
ppi,
prob = c(0.0855),
na.rm = T
),
q_83_low = quantile(
ppi,
prob = c(1-0.085),
na.rm = T
)
) %>%
ungroup() %>%
ggplot(aes(x = t))+
geom_line(aes(y = mn, color = party), linetype = "dashed", linewidth= 1.0)+
geom_ribbon(aes(ymin = q_95_low, ymax = q_95_upp, fill = party), alpha = 0.2)+
geom_ribbon(aes(ymin = q_83_low, ymax = q_83_upp, fill = party), alpha = 0.3)+
ggsci::scale_color_jco() +
ggsci::scale_fill_jco() +
facet_wrap(~state)+
labs(
x = "",
y = "",
title = plt_title,
caption = plt_caption
) -> p
return(p)
}
plt_house_effects <- function(
df_draws,
plt_title,
plt_caption
) {
df_draws %>%
mutate(
house_effects = ttheta - ppi
) %>%
ggplot(aes(x = house_effects, fill = party))+
geom_histogram(
binwidth = 0.001,
position = "identity",
alpha = 0.3)+
ggsci::scale_fill_jco() +
facet_wrap(~state)+
labs(
x = "",
y = "",
title = plt_title,
caption = plt_caption
) -> p
return(p)
}Function to simulate prior for a given scenario
simulate_prior <- function(
scen,
n_draws = 1000,
n_periods = 100
) {
do.call(
"rbind",
lapply(
seq(1, n_draws),
function(draw) {
df_draw <- simulate_mmu_ppi(
n_periods = n_periods,
priors[[scen]]$m_mmu_T,
priors[[scen]]$V_mmu_T,
priors[[scen]]$W,
priors[[scen]]$sig_ddelta
)
df_draw$draw = draw
df_draw
}
)) -> df_draws
df_draws$scenario = scen
# plot ppi
plt_ppi <- plt_draws_ppi(
df_draws,
plt_title = paste0(scen, ": Expected vote share"),
plt_caption = "Simulated prior distribution.")
plt_house <- plt_house_effects(
df_draws,
plt_title = paste0(scen, ": House effects"),
plt_caption = "Simulated prior distribution: difference between theta and pi (see model description).")
return(list(
df_draws = df_draws,
plt_ppi = plt_ppi,
plt_house = plt_house
)
)
}